Setup

start_time <- Sys.time() 
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)
## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
## 
## $subsetCells
## [1] "F"
## 
## $resolution
## [1] "0.001"
## 
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.001__perplexity-30__nCores-4"
## 
## $nCores
## [1] "4"
## 
## $perplexity
## [1] "30"
load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4" 

library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr) 
library(data.table) 
library(readxl) 
library(reshape2)
library(ggrepel)
library(plotly)

library(GeneOverlap) # BiocManager::install("GeneOverlap") 
library(enrichR) #BiocManager::install("enrichR")

library(monocle) # BiocManager::install("monocle")   
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
 
#  
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
  allGenes <- F
  test.use <- "wilcox"
}else{
  allGenes <- T
  # MAST not install on Minerva...
  test.use <- "wilcox"}
allGenes 
## [1] TRUE

Convert back to Seurat

CDS_to_Seurat <- function(cds, export_PC=F, export_UMAP=F, export_tSNE=F){ 
  # sum(colnames(mDAT) != colnames( mDAT@reducedDimS)) 
  ## Manually save reduced dimensions
  if(export_PC==T){
    mDAT$PC1 <- mDAT@normalized_data_projection[,1]
    mDAT$PC2 <- mDAT@normalized_data_projection[,2]
    mDAT$PC3 <- mDAT@normalized_data_projection[,3] 
  }
  if(export_UMAP==T){
    mDAT$UMAP1 <- mDAT@reducedDimA[1,]
    mDAT$UMAP2 <- mDAT@reducedDimA[2,]
    mDAT$UMAP3 <- mDAT@reducedDimA[3,]
  }
  DAT <- exportCDS(mDAT, export_to = "Seurat", export_all = T)
  DAT@scale.data <- DAT@data #Data was already scaled in Monocle
  # DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
  return(DAT)
}
DAT <- CDS_to_Seurat(mDAT, export_PC = T, export_UMAP = T)
## Scaling data matrix
head(DAT@meta.data) 
##                  nGene nUMI orig.ident singlet.or.not.binary       HTO
## GTCATTTTCCGCATCT  2035 6729  RAJ_13357                     1 NYUMD0011
## ATAAGAGAGGAGTTGC  1914 6718  RAJ_13357                     1   BID0076
## CATTATCCATGGTCTA  1971 6156  RAJ_13357                     1  MSMD0067
## CTTAGGAGTGGCGAAT  1893 6392  RAJ_13357                     1 NYUMD0011
## CATCAGAAGCACCGCT  1967 6110  RAJ_13357                     1  MSMD0067
## GCGGGTTAGTAGCCGA  1868 5977  RAJ_13357                     1 NYUMD0011
##                         ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT  MSMD0207   0.04191439     0     1       0       0
## ATAAGAGAGGAGTTGC  BIMD0076   0.04362066     0     1       0       0
## CATTATCCATGGTCTA NYUMD0015   0.03086921     0     1       0       0
## CTTAGGAGTGGCGAAT  MSMD0207   0.03613892     0     1       0       0
## CATCAGAAGCACCGCT NYUMD0015   0.04437531    19    10       8       0
## GCGGGTTAGTAGCCGA  MSMD0207   0.03514056     6     2       0       0
##                           barcode      dx     mut Ethnicity Gender Age
## GTCATTTTCCGCATCT GTCATTTTCCGCATCT      PD      PD     White      M  71
## ATAAGAGAGGAGTTGC ATAAGAGAGGAGTTGC      PD     GBA     White      M  47
## CATTATCCATGGTCTA CATTATCCATGGTCTA control control     White      M  51
## CTTAGGAGTGGCGAAT CTTAGGAGTGGCGAAT      PD      PD     White      M  71
## CATCAGAAGCACCGCT CATCAGAAGCACCGCT control control     White      M  51
## GCGGGTTAGTAGCCGA GCGGGTTAGTAGCCGA      PD      PD     White      M  71
##                  Size_Factor louvain_component Cluster garnett_cluster
## GTCATTTTCCGCATCT    5.117674                 1       1               4
## ATAAGAGAGGAGTTGC    5.109379                 1       1               4
## CATTATCCATGGTCTA    4.661479                 1       1               1
## CTTAGGAGTGGCGAAT    4.844786                 1       1               1
## CATCAGAAGCACCGCT    4.610054                 1       1               6
## GCGGGTTAGTAGCCGA    4.508861                 1       1               6
##                  cell_type cluster_ext_type cluster_ext_type_filt      PC1
## GTCATTTTCCGCATCT Monocytes        Monocytes             Monocytes 21.74363
## ATAAGAGAGGAGTTGC Monocytes        Monocytes             Monocytes 20.51278
## CATTATCCATGGTCTA Monocytes        Monocytes             Monocytes 20.73024
## CTTAGGAGTGGCGAAT Monocytes        Monocytes             Monocytes 21.48480
## CATCAGAAGCACCGCT Monocytes        Monocytes             Monocytes 18.94471
## GCGGGTTAGTAGCCGA Monocytes        Monocytes             Monocytes 17.77910
##                          PC2       PC3    UMAP1    UMAP2    UMAP3
## GTCATTTTCCGCATCT  0.72274985 -2.813874 1.365752 1.513548 1.161469
## ATAAGAGAGGAGTTGC  1.02458096 -3.651205 1.354717 1.488010 1.172568
## CATTATCCATGGTCTA -0.03820978 -2.694958 1.351607 1.519749 1.158323
## CTTAGGAGTGGCGAAT -0.42949109 -1.943783 1.369097 1.527969 1.158835
## CATCAGAAGCACCGCT  4.19589928 -4.857566 1.305207 1.470326 1.143383
## GCGGGTTAGTAGCCGA  2.61114660 -7.713996 1.303913 1.455874 1.130939

Biomarker Expression

make_markerDF <- function(DAT, markerList){
  markerData <- DAT@data[row.names(DAT@data) %in% markerList,] %>% t() %>%
    as.matrix() %>%  data.table(keep.rownames = T, key = "rn")
  markerData[markerData$markerList[1]==0,] <- NA
  markerData[markerData$markerList[2]==0,] <- NA 
  colnames(markerData) <- c("Cell",paste("Gene", rep(1:length(markerList)), sep=""))
  return(markerData)
}

gene_gene_plot <- function(DAT, markerList, colorby, title="", plot=T, legend=T, filterZeros=T){ 
  
  ## Efficiently merge using data.table
  markerData <- make_markerDF(DAT, markerList)
  dt1 <- data.table(markerData, keep.rownames = "Cell", key = "Cell") %>% copy()
  dt2 <- data.table(DAT@meta.data, keep.rownames = "Cell", key = "Cell") %>% copy()
  row.names(dt2) <- dt2$Cell
  markerDT <- dt1[dt2]
  if(filterZeros==T){markerDT <- subset(markerDT, Gene1!=0 & Gene2!=0)}
  
  p <- ggplot(data = markerDT, aes(x=Gene1, y=Gene2 )) +
    stat_density_2d(aes(fill = ..level..), 
                    geom = "polygon", colour="purple", bins = 10, size=.1) +
    geom_point(aes(color=as.factor(eval(parse(text=colorby)))), shape=16, stroke=0, size=2, alpha=.5) +
    guides(colour = guide_legend(title=colorby, override.aes = list(alpha = 1))) + 
      scale_color_manual(values = pretty_colors(mDAT, var = colorby)) +
    labs(x=markerList[1], y=markerList[2], title=title) +
    geom_smooth(method="lm")
  if(legend==F){p <- p + theme(legend.position = "none")}
  if(plot==T){print(p)}
  return(p)
} 

Monocyte Markers

p <- gene_gene_plot(DAT, c("CD14", "FCGR3A"), colorby="cluster_ext_type_filt", 
               title="Monocyte Subtype Markers")

HLA Genes vs. LRRK2

# Identify HLA gene names
d <- DAT@data@Dimnames[[1]] 
HLA_genes  <- d[startsWith(d, "HLA-DR")] 
# Plot
HLA_1 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[1]), colorby="cluster_ext_type_filt", 
               title="", plot = F, legend=F)
HLA_2 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[2]), colorby="cluster_ext_type_filt", 
               title="", plot = F, legend=F)
HLA_3 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[3]), colorby="cluster_ext_type_filt", 
               title="", plot = F, legend=F)
plot_grid(HLA_1, HLA_2, HLA_3)

DGE: CD16+ vs. CD16– Monocytes

Remove filters to get all DEGs.

clustDAT <- SubsetData(DAT, subset.name = "Cluster", accept.value = c(1,2), do.scale = F)

DEGs_monocytes <- runDGE(DAT, meta_var = "Cluster", group1 = 1, group2 = 2, 
       allGenes = allGenes, test.use = test.use)
## Warning: Removed 1 rows containing missing values (geom_point).

# DEGs_monocytes <- read.csv("Results/MonocyteSubtype.markers.csv", row.names = 1)
createDT(DEGs_monocytes, caption="DEGs between cluster 1 (CD16-- monocytes) and cluster 2 (CD16+ monocytes")
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
write.csv(DEGs_monocytes, file.path(resultsPath, "MonocyteSubtype.markers.csv"), quote = F, row.names = T)

Identify Cluster-specific Biomarkers

identify_unique_markers <-function(DAT, clusterA, clusterB, allGenes=F, test.use="wilcox"){ 
  DAT <- SetIdent(DAT, ident.use =  DAT@meta.data$Cluster)  
  if(allGenes==F){
      clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25, 
                                    only.pos = F, test.use = test.use)
      clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25, 
                                    only.pos = F, test.use = test.use) 
  }else{
      clustA.markers <- FindMarkers(DAT, ident.1=clusterA, 
                                    only.pos = F, test.use = test.use,
                                    logfc.threshold = -Inf, min.pct = -Inf, 
                                    min.cells.group = -Inf,min.cells.gene = -Inf,
                                    min.diff.pct = -Inf)
      clustB.markers <- FindMarkers(DAT, ident.1=clusterB,  
                                    only.pos = F, test.use = test.use,
                                    logfc.threshold = -Inf, min.pct = -Inf,
                                    min.cells.group = -Inf,min.cells.gene = -Inf,
                                    min.diff.pct = -Inf)
  } 
  clustA.markers <- cbind(Gene=row.names(clustA.markers), clustA.markers)
  clustB.markers <- cbind(Gene=row.names(clustB.markers), clustB.markers)
  
  clustA.uniqueMarkers <- clustA.markers[!(row.names(clustA.markers) %in% row.names(clustB.markers)),] %>%
    subset(p_val_adj<=0.05) %>% mutate(Cluster=clusterA)
  clustB.uniqueMarkers <- clustB.markers[!(row.names(clustB.markers) %in% row.names(clustA.markers)),] %>%
    subset(p_val_adj<=0.05) %>% mutate(Cluster=clusterB)
  
  uniqueMarkers <- rbind(clustA.uniqueMarkers, clustB.uniqueMarkers)
  row.names(uniqueMarkers) <- uniqueMarkers$Gene
  # difference <- abs( length(row.names(clustA.uniqueMarkers)) -
  #                      length(row.names(clustB.uniqueMarkers) ) )
  # uniqueMarkers <- data.frame(ClusterA_markers=c(row.names(clustA.uniqueMarkers), rep("",difference) ),
  #                             ClusterB_markers=row.names(clustB.uniqueMarkers))
  write.csv(uniqueMarkers,
            file.path(resultsPath,"unique_cluster_markers.csv"), 
            quote = F, row.names = F)
  createDT(uniqueMarkers, "Unique/Mutually Exclusive Markers of Cluster 1 and Cluster 2") 
  return(uniqueMarkers)
}
# MUST use full dataset w/ all clusters (DAT) 
uniqueMarkers <- identify_unique_markers(DAT, clusterA = 1, clusterB = 2, 
                                         allGenes=T, test.use = test.use) 

DGE Across Clusters

Disease

# clustDAT@meta.data$dx %>% unique()
# FDR <- 0.05/dim(DEGs)[1]
# subset(DEGs, p_val<FDR)
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control", 
               allGenes = F, test.use = test.use) 

Mutation

# clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA", 
               allGenes = F, test.use = test.use)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

DGE Within Clusters

Disease

DGE_within_clusters(clustDAT,  meta_var = "dx", group1 = "PD", group2 = "control", 
                    clusterList = c(1,2), allGenes = F, test.use = test.use)

Cluster 1: PD vs. control

[1] “Not showing table”

Cluster 2: PD vs. control

[1] “Not showing table”

Mutation

DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", 
                    clusterList = c(1,2), allGenes = F, test.use = test.use)

Cluster 1: PD vs. GBA

[1] “Not showing table”

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).

Cluster 2: PD vs. GBA

[1] “Not showing table”

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_text_repel).

DEG Enrichment w/ enrichR

enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
                 "GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018", 
                 "Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
 
geneList <- data.frame(Gene=row.names(DEGs_monocytes), 
     Weight=scales::rescale(length(DEGs_monocytes$p_val_adj):1))

results <- enrichr(genes = geneList, databases = enrichr_dbs ) 

Uploading data to Enrichr…

## Error in curl::curl_fetch_memory(url, handle = handle): Failed to connect to amp.pharm.mssm.edu port 80: No route to host
for (db in enrichr_dbs){
  cat('\n')
  cat("##",db,"\n")  
  # res <- subset(results[[db]], Adjusted.P.value<=0.05)
  createDT_html(results[[db]], paste("Enrichr Results:",db))
  cat('\n')
}  

KEGG_2018

## Error in crosstalk::is.SharedData(data): object 'results' not found

Save Results

save.image(file.path(resultsPath, "scRNAseq_results.RData"))

end_time <- Sys.time()
end_time - start_time
## Time difference of 4.521736 hours
cat("\n\n")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
## [1] C
## 
## attached base packages:
##  [1] splines   stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2      garnett_0.1.4       monocle_2.10.1     
##  [4] DDRTree_0.1.5       irlba_2.3.3         VGAM_1.1-1         
##  [7] Biobase_2.42.0      BiocGenerics_0.28.0 enrichR_1.0        
## [10] GeneOverlap_1.18.0  plotly_4.7.1        ggrepel_0.8.0      
## [13] reshape2_1.4.3      readxl_1.1.0        data.table_1.11.8  
## [16] dplyr_0.7.6         Seurat_2.3.4        Matrix_1.2-14      
## [19] cowplot_0.9.3       ggplot2_3.1.0      
## 
## loaded via a namespace (and not attached):
##   [1] snow_0.4-3           backports_1.1.2      Hmisc_4.1-1         
##   [4] plyr_1.8.4           igraph_1.2.4         lazyeval_0.2.1      
##   [7] crosstalk_1.0.0      densityClust_0.3     fastICA_1.2-1       
##  [10] digest_0.6.18        foreach_1.4.4        htmltools_0.3.6     
##  [13] viridis_0.5.1        lars_1.2             gdata_2.18.0        
##  [16] magrittr_1.5         checkmate_1.8.5      cluster_2.0.7-1     
##  [19] mixtools_1.1.0       ROCR_1.0-7           limma_3.38.2        
##  [22] matrixStats_0.53.1   R.utils_2.7.0        docopt_0.6.1        
##  [25] colorspace_1.3-2     sparsesvd_0.1-4      crayon_1.3.4        
##  [28] jsonlite_1.5         bindr_0.1.1          survival_2.42-6     
##  [31] zoo_1.8-3            iterators_1.0.10     ape_5.1             
##  [34] glue_1.3.0           gtable_0.2.0         kernlab_0.9-26      
##  [37] prabclus_2.2-6       DEoptimR_1.0-8       scales_1.0.0        
##  [40] pheatmap_1.0.10      mvtnorm_1.0-8        Rcpp_1.0.0          
##  [43] metap_0.9            dtw_1.20-1           xtable_1.8-2        
##  [46] viridisLite_0.3.0    htmlTable_1.12       reticulate_1.10     
##  [49] foreign_0.8-70       bit_1.1-14           proxy_0.4-22        
##  [52] mclust_5.4.1         SDMTools_1.1-221     Formula_1.2-3       
##  [55] DT_0.4               tsne_0.1-3           htmlwidgets_1.2     
##  [58] httr_1.3.1           FNN_1.1              gplots_3.0.1        
##  [61] RColorBrewer_1.1-2   fpc_2.1-11           acepack_1.4.1       
##  [64] modeltools_0.2-22    ica_1.0-2            pkgconfig_2.0.2     
##  [67] R.methodsS3_1.7.1    flexmix_2.3-14       nnet_7.3-12         
##  [70] later_0.7.3          labeling_0.3         tidyselect_0.2.4    
##  [73] rlang_0.3.1          cellranger_1.1.0     munsell_0.5.0       
##  [76] tools_3.5.1          ggridges_0.5.0       evaluate_0.11       
##  [79] stringr_1.4.0        yaml_2.1.19          knitr_1.20          
##  [82] bit64_0.9-7          fitdistrplus_1.0-9   robustbase_0.93-1.1 
##  [85] caTools_1.17.1       purrr_0.2.5          RANN_2.6.1          
##  [88] pbapply_1.3-4        nlme_3.1-137         mime_0.5            
##  [91] slam_0.1-43          R.oo_1.22.0          hdf5r_1.0.0         
##  [94] compiler_3.5.1       rstudioapi_0.7       curl_3.2            
##  [97] png_0.1-7            tibble_2.0.1         stringi_1.3.1       
## [100] lattice_0.20-35      trimcluster_0.1-2.1  HSMMSingleCell_1.2.0
## [103] pillar_1.3.1         combinat_0.0-8       lmtest_0.9-36       
## [106] bitops_1.0-6         httpuv_1.4.5         R6_2.4.0            
## [109] latticeExtra_0.6-28  promises_1.0.1       KernSmooth_2.23-15  
## [112] gridExtra_2.3        codetools_0.2-15     MASS_7.3-50         
## [115] gtools_3.8.1         assertthat_0.2.0     rjson_0.2.20        
## [118] rprojroot_1.3-2      withr_2.1.2          qlcMatrix_0.9.7     
## [121] diptest_0.75-7       doSNOW_1.0.16        grid_3.5.1          
## [124] rpart_4.1-13         tidyr_0.8.1          class_7.3-14        
## [127] rmarkdown_1.10       segmented_0.5-3.0    Rtsne_0.13          
## [130] shiny_1.1.0          base64enc_0.1-3